{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2a8f7af4cd234612b9b28d11086930be",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from huggingface_hub import notebook_login\n",
    "from datasets import load_dataset\n",
    "\n",
    "notebook_login()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "translated_dataset_hf = load_dataset(\"csv\",data_files=\"./final_combined.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatasetDict({\n",
       "    train: Dataset({\n",
       "        features: ['original_instruction', 'original_output', 'translated_instruction', 'translated_output'],\n",
       "        num_rows: 389608\n",
       "    })\n",
       "})"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "translated_dataset_hf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_csv('./final_combined.csv')\n",
    "# print(df.to_string()) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>original_instruction</th>\n",
       "      <th>original_output</th>\n",
       "      <th>translated_instruction</th>\n",
       "      <th>translated_output</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>389543</td>\n",
       "      <td>389523</td>\n",
       "      <td>389608</td>\n",
       "      <td>389608</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>362776</td>\n",
       "      <td>333366</td>\n",
       "      <td>328492</td>\n",
       "      <td>334473</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>I don't care.</td>\n",
       "      <td>Negative</td>\n",
       "      <td>ಸಾರ್ವಜನಿಕವಾಗಿ ಮಾತನಾಡುವ ನನ್ನ ಕೌಶಲ್ಯವನ್ನು ನಾನು ಹ...</td>\n",
       "      <td>?</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>58</td>\n",
       "      <td>90</td>\n",
       "      <td>70</td>\n",
       "      <td>664</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       original_instruction original_output  \\\n",
       "count                389543          389523   \n",
       "unique               362776          333366   \n",
       "top           I don't care.        Negative   \n",
       "freq                     58              90   \n",
       "\n",
       "                                   translated_instruction translated_output  \n",
       "count                                              389608            389608  \n",
       "unique                                             328492            334473  \n",
       "top     ಸಾರ್ವಜನಿಕವಾಗಿ ಮಾತನಾಡುವ ನನ್ನ ಕೌಶಲ್ಯವನ್ನು ನಾನು ಹ...                 ?  \n",
       "freq                                                   70               664  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                                              instruction   output translation\n",
      "count                                             1558302  1558262     1558432\n",
      "unique                                             691261   667235           4\n",
      "top     ಸಾರ್ವಜನಿಕವಾಗಿ ಮಾತನಾಡುವ ನನ್ನ ಕೌಶಲ್ಯವನ್ನು ನಾನು ಹ...        ?    en_to_en\n",
      "freq                                                  140     1328      389608\n"
     ]
    }
   ],
   "source": [
    "# Create the first section (en_to_en)\n",
    "section1 = df[['original_instruction', 'original_output']].rename(columns={'original_instruction': 'instruction', 'original_output': 'output'})\n",
    "section1['translation'] = 'en_to_en'\n",
    "\n",
    "# Create the second section (kn_to_kn)\n",
    "section2 = df[['translated_instruction', 'translated_output']].rename(columns={'translated_instruction': 'instruction', 'translated_output': 'output'})\n",
    "section2['translation'] = 'kn_to_kn'\n",
    "\n",
    "# Create the third section (en_to_kn)\n",
    "section3 = df[['original_instruction', 'translated_output']].rename(columns={'original_instruction': 'instruction', 'translated_output': 'output'})\n",
    "section3['translation'] = 'en_to_kn'\n",
    "\n",
    "# Create the fourth section (kn_to_en)\n",
    "section4 = df[['translated_instruction', 'original_output']].rename(columns={'translated_instruction': 'instruction', 'original_output': 'output'})\n",
    "section4['translation'] = 'kn_to_en'\n",
    "\n",
    "# Concatenate all sections to create the final DataFrame\n",
    "final_df = pd.concat([section1, section2, section3, section4], ignore_index=True)\n",
    "\n",
    "# Print the description of the final DataFrame\n",
    "print(final_df.describe())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                                               instruction  \\\n",
      "779216   Give three tips for staying healthy. answer in...   \n",
      "779217   answer in kannada What are the three primary c...   \n",
      "779218   respond to the following in kannada Describe t...   \n",
      "779219   How can we reduce air pollution? answer in kan...   \n",
      "779220   answer in kannada Describe a time when you had...   \n",
      "...                                                    ...   \n",
      "1168819  answer in kannada Compare and contrast regulat...   \n",
      "1168820  Reorganize the following paragraph into a cohe...   \n",
      "1168821  answer in kannada Given a list of cities, clas...   \n",
      "1168822  What is the meaning of the phrase \"never judge...   \n",
      "1168823  Edit the following sentence so it reads correc...   \n",
      "\n",
      "                                                    output translation  \n",
      "779216   1.Eat ಸಮತೋಲಿತ ಆಹಾರ ಮತ್ತು ಸಾಕಷ್ಟು ಹಣ್ಣುಗಳು ಮತ್ತ...    en_to_kn  \n",
      "779217       ಮೂರು ಪ್ರಾಥಮಿಕ ಬಣ್ಣಗಳು ಕೆಂಪು, ನೀಲಿ ಮತ್ತು ಹಳದಿ.    en_to_kn  \n",
      "779218   ಪರಮಾಣು ಪ್ರೋಟಾನ್ಗಳು ಮತ್ತು ನ್ಯೂಟ್ರಾನ್ಗಳನ್ನು ಹೊಂದ...    en_to_kn  \n",
      "779219   ವಾಯುಮಾಲಿನ್ಯವನ್ನು ಕಡಿಮೆ ಮಾಡಲು ಹಲವಾರು ಮಾರ್ಗಗಳಿವೆ...    en_to_kn  \n",
      "779220   ನಿರ್ಮಾಣ ಕಂಪನಿಯೊಂದರಲ್ಲಿ ಯೋಜನಾ ವ್ಯವಸ್ಥಾಪಕರಾಗಿ ಕೆ...    en_to_kn  \n",
      "...                                                    ...         ...  \n",
      "1168819  ಕೈಗಾರಿಕೆಗಳನ್ನು ನಿರ್ವಹಿಸಲು ನಿಯಂತ್ರಣ ಮತ್ತು ನಿಯಂತ...    en_to_kn  \n",
      "1168820  ಸಾಮಾಜಿಕ ಮಾಧ್ಯಮವು ನಾವು ಸಂವಹನ ನಡೆಸುವ ವಿಧಾನದಲ್ಲಿ ...    en_to_kn  \n",
      "1168821  ಸಿಡ್ನಿ ಅಭಿವೃದ್ಧಿ ಹೊಂದಿದ ನಗರವಾಗಿದೆ. ರಿಯೋ ಡಿ ಜನೈ...    en_to_kn  \n",
      "1168822  \"ಪುಸ್ತಕವನ್ನು ಅದರ ಮುಖಪುಟದಿಂದ ಎಂದಿಗೂ ನಿರ್ಣಯಿಸಬೇಡ...    en_to_kn  \n",
      "1168823                ನಾನು ಬೆಳಗಿನ ಉಪಾಹಾರವನ್ನು ಸೇವಿಸಿದ್ದೆ.    en_to_kn  \n",
      "\n",
      "[389608 rows x 3 columns]\n",
      "                                               instruction  \\\n",
      "0                     Give three tips for staying healthy.   \n",
      "1                       What are the three primary colors?   \n",
      "2                       Describe the structure of an atom.   \n",
      "3                         How can we reduce air pollution?   \n",
      "4        Describe a time when you had to make a difficu...   \n",
      "...                                                    ...   \n",
      "1558427  ನಿಯಂತ್ರಣ ಮತ್ತು ನಿಯಂತ್ರಣವನ್ನು ಹೋಲಿಸಿ ಮತ್ತು ವ್ಯತ...   \n",
      "1558428  ಈ ಕೆಳಗಿನ ಪ್ಯಾರಾಗ್ರಾಫ್ ಅನ್ನು ಒಗ್ಗೂಡಿಸುವ ರಚನೆಯಾಗ...   \n",
      "1558429  ನಗರಗಳ ಪಟ್ಟಿಯನ್ನು ನೀಡಿದರೆ, ಪ್ರತಿ ನಗರವನ್ನು ಅಭಿವೃ...   \n",
      "1558430  \"ಎಂದಿಗೂ ಪುಸ್ತಕವನ್ನು ಅದರ ಮುಖಪುಟದಿಂದ ನಿರ್ಣಯಿಸಬೇಡ...   \n",
      "1558431  ಈ ಕೆಳಗಿನ ವಾಕ್ಯವು ಸರಿಯಾಗಿ ಓದುವಂತೆ ಅದನ್ನು ಸಂಪಾದಿ...   \n",
      "\n",
      "                                                    output translation  \n",
      "0        1.Eat a balanced diet and make sure to include...    en_to_en  \n",
      "1        The three primary colors are red, blue, and ye...    en_to_en  \n",
      "2        An atom is made up of a nucleus, which contain...    en_to_en  \n",
      "3        There are a number of ways to reduce air pollu...    en_to_en  \n",
      "4        I had to make a difficult decision when I was ...    en_to_en  \n",
      "...                                                    ...         ...  \n",
      "1558427  Regulation and deregulation are two contrastin...    kn_to_en  \n",
      "1558428  Social media has revolutionized the way we com...    kn_to_en  \n",
      "1558429  Sydney is a developed city.\\nRio de Janeiro is...    kn_to_en  \n",
      "1558430  The phrase \"never judge a book by its cover\" i...    kn_to_en  \n",
      "1558431                          I had eaten my breakfast.    kn_to_en  \n",
      "\n",
      "[1558432 rows x 3 columns]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Adithya\\AppData\\Local\\Temp\\ipykernel_13048\\2990095260.py:23: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  en_to_kn_rows['instruction'] = en_to_kn_rows.apply(lambda row: f\"{get_random_phrase()} {row['instruction']}\" if random.choice([True, False]) else f\"{row['instruction']} {get_random_phrase()}\", axis=1)\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import random\n",
    "\n",
    "# Assuming 'final_df' is your DataFrame after the previous operations\n",
    "\n",
    "# Filter rows with 'kn_to_en' translation\n",
    "en_to_kn_rows = final_df[final_df['translation'] == 'en_to_kn']\n",
    "\n",
    "# Phrases to be added\n",
    "phrases = [\n",
    "    \"respond to the following in kannada\",\n",
    "    \"answer in kannada\",\n",
    "    \"tell it to me in kannada\",\n",
    "    \"respond in kannada\",\n",
    "    \"reply in kannada\",\n",
    "]\n",
    "\n",
    "# Function to randomly choose a phrase\n",
    "def get_random_phrase():\n",
    "    return random.choice(phrases)\n",
    "\n",
    "# Apply the random phrase to each row either at the beginning or end\n",
    "en_to_kn_rows['instruction'] = en_to_kn_rows.apply(lambda row: f\"{get_random_phrase()} {row['instruction']}\" if random.choice([True, False]) else f\"{row['instruction']} {get_random_phrase()}\", axis=1)\n",
    "\n",
    "# Print the modified DataFrame\n",
    "print(en_to_kn_rows)\n",
    "\n",
    "# Replace modified rows in the original DataFrame\n",
    "final_df.loc[en_to_kn_rows.index, 'instruction'] = en_to_kn_rows['instruction']\n",
    "\n",
    "# Print the updated DataFrame\n",
    "print(final_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                                               instruction  \\\n",
      "1168824  answer in English ಆರೋಗ್ಯವಾಗಿರಲು ಮೂರು ಸಲಹೆಗಳನ್ನ...   \n",
      "1168825  respond in englishreply in english ಮೂರು ಪ್ರಾಥಮ...   \n",
      "1168826  ಪರಮಾಣುವಿನ ರಚನೆಯನ್ನು ವಿವರಿಸಿ. respond to the fo...   \n",
      "1168827  respond in englishreply in english ವಾಯು ಮಾಲಿನ್...   \n",
      "1168828  ನೀವು ಕಠಿಣ ನಿರ್ಧಾರವನ್ನು ತೆಗೆದುಕೊಳ್ಳಬೇಕಾದ ಸಮಯವನ್...   \n",
      "...                                                    ...   \n",
      "1558427  respond in englishreply in english ನಿಯಂತ್ರಣ ಮತ...   \n",
      "1558428  answer in English ಈ ಕೆಳಗಿನ ಪ್ಯಾರಾಗ್ರಾಫ್ ಅನ್ನು ...   \n",
      "1558429  answer in English ನಗರಗಳ ಪಟ್ಟಿಯನ್ನು ನೀಡಿದರೆ, ಪ್...   \n",
      "1558430  \"ಎಂದಿಗೂ ಪುಸ್ತಕವನ್ನು ಅದರ ಮುಖಪುಟದಿಂದ ನಿರ್ಣಯಿಸಬೇಡ...   \n",
      "1558431  ಈ ಕೆಳಗಿನ ವಾಕ್ಯವು ಸರಿಯಾಗಿ ಓದುವಂತೆ ಅದನ್ನು ಸಂಪಾದಿ...   \n",
      "\n",
      "                                                    output translation  \n",
      "1168824  1.Eat a balanced diet and make sure to include...    kn_to_en  \n",
      "1168825  The three primary colors are red, blue, and ye...    kn_to_en  \n",
      "1168826  An atom is made up of a nucleus, which contain...    kn_to_en  \n",
      "1168827  There are a number of ways to reduce air pollu...    kn_to_en  \n",
      "1168828  I had to make a difficult decision when I was ...    kn_to_en  \n",
      "...                                                    ...         ...  \n",
      "1558427  Regulation and deregulation are two contrastin...    kn_to_en  \n",
      "1558428  Social media has revolutionized the way we com...    kn_to_en  \n",
      "1558429  Sydney is a developed city.\\nRio de Janeiro is...    kn_to_en  \n",
      "1558430  The phrase \"never judge a book by its cover\" i...    kn_to_en  \n",
      "1558431                          I had eaten my breakfast.    kn_to_en  \n",
      "\n",
      "[389608 rows x 3 columns]\n",
      "                                               instruction  \\\n",
      "0                     Give three tips for staying healthy.   \n",
      "1                       What are the three primary colors?   \n",
      "2                       Describe the structure of an atom.   \n",
      "3                         How can we reduce air pollution?   \n",
      "4        Describe a time when you had to make a difficu...   \n",
      "...                                                    ...   \n",
      "1558427  respond in englishreply in english ನಿಯಂತ್ರಣ ಮತ...   \n",
      "1558428  answer in English ಈ ಕೆಳಗಿನ ಪ್ಯಾರಾಗ್ರಾಫ್ ಅನ್ನು ...   \n",
      "1558429  answer in English ನಗರಗಳ ಪಟ್ಟಿಯನ್ನು ನೀಡಿದರೆ, ಪ್...   \n",
      "1558430  \"ಎಂದಿಗೂ ಪುಸ್ತಕವನ್ನು ಅದರ ಮುಖಪುಟದಿಂದ ನಿರ್ಣಯಿಸಬೇಡ...   \n",
      "1558431  ಈ ಕೆಳಗಿನ ವಾಕ್ಯವು ಸರಿಯಾಗಿ ಓದುವಂತೆ ಅದನ್ನು ಸಂಪಾದಿ...   \n",
      "\n",
      "                                                    output translation  \n",
      "0        1.Eat a balanced diet and make sure to include...    en_to_en  \n",
      "1        The three primary colors are red, blue, and ye...    en_to_en  \n",
      "2        An atom is made up of a nucleus, which contain...    en_to_en  \n",
      "3        There are a number of ways to reduce air pollu...    en_to_en  \n",
      "4        I had to make a difficult decision when I was ...    en_to_en  \n",
      "...                                                    ...         ...  \n",
      "1558427  Regulation and deregulation are two contrastin...    kn_to_en  \n",
      "1558428  Social media has revolutionized the way we com...    kn_to_en  \n",
      "1558429  Sydney is a developed city.\\nRio de Janeiro is...    kn_to_en  \n",
      "1558430  The phrase \"never judge a book by its cover\" i...    kn_to_en  \n",
      "1558431                          I had eaten my breakfast.    kn_to_en  \n",
      "\n",
      "[1558432 rows x 3 columns]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Adithya\\AppData\\Local\\Temp\\ipykernel_13048\\1086833397.py:18: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  kn_to_en_rows['instruction'] = kn_to_en_rows.apply(lambda row: f\"{get_random_english_phrase()} {row['instruction']}\" if random.choice([True, False]) else f\"{row['instruction']} {get_random_english_phrase()}\", axis=1)\n"
     ]
    }
   ],
   "source": [
    "# Filter rows with 'kn_to_en' translation\n",
    "kn_to_en_rows = final_df[final_df['translation'] == 'kn_to_en']\n",
    "\n",
    "# Phrases to be added for English response\n",
    "english_phrases = [\n",
    "    \"respond to the following in English\",\n",
    "    \"answer in English\",\n",
    "    \"tell it to me in English\",\n",
    "    \"respond in english\"\n",
    "    \"reply in english\"\n",
    "]\n",
    "\n",
    "# Function to randomly choose an English phrase\n",
    "def get_random_english_phrase():\n",
    "    return random.choice(english_phrases)\n",
    "\n",
    "# Apply the random English phrase to each row either at the beginning or end\n",
    "kn_to_en_rows['instruction'] = kn_to_en_rows.apply(lambda row: f\"{get_random_english_phrase()} {row['instruction']}\" if random.choice([True, False]) else f\"{row['instruction']} {get_random_english_phrase()}\", axis=1)\n",
    "\n",
    "# Print the modified DataFrame\n",
    "print(kn_to_en_rows)\n",
    "\n",
    "# Replace modified rows in the original DataFrame\n",
    "final_df.loc[kn_to_en_rows.index, 'instruction'] = kn_to_en_rows['instruction']\n",
    "\n",
    "# Print the updated DataFrame\n",
    "print(final_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "final_df['text'] = final_df.apply(lambda row: f\"<|user|>{row['instruction']}<|endoftext|><|assistant|>{row['output']}<|endoftext|>\", axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "final_df.to_csv('./bilingual_instruct.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fd2878130a1c409fa469a641a50618e6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating train split: 0 examples [00:00, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "bilingual_dataset_hf = load_dataset(\"csv\",data_files=\"./bilingual_instruct.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatasetDict({\n",
       "    train: Dataset({\n",
       "        features: ['instruction', 'output', 'translation', 'text'],\n",
       "        num_rows: 1558432\n",
       "    })\n",
       "})"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bilingual_dataset_hf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "35a9f9a6d089485892103d2295b26648",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Uploading the dataset shards:   0%|          | 0/5 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e12d1131abba4471b3bd09e4e47d40c6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating parquet from Arrow format:   0%|          | 0/312 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7e1101401c0b46bd8a2d28f90d39541a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating parquet from Arrow format:   0%|          | 0/312 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "77a7fcbaa651469fad4c76851bdf9b2b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating parquet from Arrow format:   0%|          | 0/312 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4b75e91ddc704b63bb9fdad06ddaaacb",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating parquet from Arrow format:   0%|          | 0/312 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c3419838105041618d95eb1b059cfa45",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating parquet from Arrow format:   0%|          | 0/312 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "CommitInfo(commit_url='https://huggingface.co/datasets/CognitiveLab/bilingual_instruct_kn/commit/554cbd730938f7336d68fd9758660f36e12afac4', commit_message='Upload dataset', commit_description='', oid='554cbd730938f7336d68fd9758660f36e12afac4', pr_url=None, pr_revision=None, pr_num=None)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bilingual_dataset_hf.push_to_hub(\"CognitiveLab/bilingual_instruct_kn\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "LLM-venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
